import jieba

jieba.add_word("二姑娘")
jieba.add_word("三姑娘")
jieba.add_word("四姑娘")
jieba.add_word("林姑娘")
jieba.add_word("史姑娘")
jieba.add_word("邢姑娘")
jieba.add_word("琴姑娘")

excludes = {"什么", "一个", "我们", "那里", "你们", "如今", "说道", "知道", "起来", "姑娘", "这里", "出来", "他们", "众人", "自己",
            "一面", "太太", "只见", "怎么", "奶奶", "两个", "没有", "不是", "不知", "这个", "听见", "这样", "进来", "咱们", "告诉",
            "就是", "东西", "回来", "只是", "大家", "老爷", "只得", "丫头", "这些", "不敢", "出去", "所以", "不过", "的话", "不好",
            "姐姐", "一时", "不能", "过来", "心里", "二爷", "如此", "今日", "银子", "几个", "答应", "二人", "还有", "只管", "这么",
            "说话", "一回", "那边", "这话", "外头", "打发", "自然", "今儿", "罢了", "屋里", "那些", "听说", "小丫头", "如何", "问道",
            "看见", "妹妹", "人家", "不用", "媳妇"}

f = open("红楼梦.txt", "r", encoding='UTF-8')

words = jieba.lcut(f.read())

counts = dict()

same = [
    ["贾母", "老太太"],
    ["凤姐", "凤姐儿"],
    ["黛玉", "林黛玉", "林姑娘"],
    ["探春", "三姑娘"]
]

for word in words:
    if len(word) == 1:
        continue
    counts[word] = counts.get(word, 0) + 1

for group in same:
    for i in range(1, len(group)):
        counts[group[0]] += counts[group[i]]
        counts.pop(group[i])

items = sorted(list(counts.items()), key=lambda x: x[1], reverse=True)

cnt = 0

for pair in items:
    if cnt == 20:
        break
    word, count = pair
    if word in excludes: 
        continue
    print("{0:<10}{1:>5}".format(word, count))
    cnt += 1

f.close()
